import os
import re
import subprocess
import whisper
import yt_dlp
from flask import Flask, request, render_template_string, send_file

app = Flask(__name__)
model = whisper.load_model("base")

FFMPEG_BIN = r"C:\ffmpeg\bin"  # ⚠️ Change this if needed

TRANSCRIPT_DIR = "transcripts"
os.makedirs(TRANSCRIPT_DIR, exist_ok=True)  # Ensure transcript folder exists

HTML_TEMPLATE = '''
<!doctype html>
<title>YouTube Transcriber</title>
<h1>YouTube Audio Transcriber</h1>
<form method="post">
  YouTube URL: <input type="text" name="url" required size="60">
  <input type="submit" value="Transcribe">
</form>
{% if transcript %}
<h2>Transcript downloaded!</h2>
<p>Transcript for video ID <strong>{{ video_id }}</strong></p>
  <!--<h2>Transcript:</h2>
  <pre style="white-space: pre-wrap;">{{ transcript }}</pre>
  <form action="/download/{{ video_id }}" method="get">
    <button type="submit">Download Transcript (.txt)</button>
  </form>-->
{% elif error %}
  <p style="color:red;"><strong>Error:</strong> {{ error }}</p>
{% endif %}
'''

def extract_video_id(url):
    match = re.search(r'(?:v=|youtu\.be/)([\w-]{11})', url)
    return match.group(1) if match else None

def get_audio_duration(filename):
    try:
        result = subprocess.run(
            [os.path.join(FFMPEG_BIN, 'ffprobe'), '-v', 'error', '-show_entries',
             'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', filename],
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            text=True
        )
        return float(result.stdout.strip())
    except Exception as e:
        print(f"[ffprobe error] {e}")
        return 0.0

def download_audio(video_url, video_id):
    output_filename = f"{video_id}.mp3"
    print(f"[INFO] Downloading audio as {output_filename}...")

    ydl_opts = {
        'format': 'bestaudio/best',
        'outtmpl': f'{video_id}.%(ext)s',
        'quiet': False,
        'no_warnings': True,
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'mp3',
            'preferredquality': '192',
        }],
        'ffmpeg_location': FFMPEG_BIN,
    }

    try:
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            ydl.download([video_url])
    except Exception as e:
        raise RuntimeError(f"yt-dlp failed: {e}")

    if not os.path.exists(output_filename):
        raise FileNotFoundError(f"Audio file {output_filename} not found.")

    if os.path.getsize(output_filename) == 0:
        raise RuntimeError(f"Downloaded audio file {output_filename} is empty.")

    duration = get_audio_duration(output_filename)
    print(f"[INFO] Audio duration: {duration:.2f} seconds")

    if duration == 0.0:
        raise RuntimeError(f"Audio file {output_filename} has 0 duration or is invalid.")

    return output_filename

@app.route('/', methods=['GET', 'POST'])
def index():
    transcript = ""
    error = ""
    video_id = ""
    if request.method == 'POST':
        url = request.form['url']
        video_id = extract_video_id(url)
        if not video_id:
            error = "Invalid YouTube URL"
        else:
            try:
                audio_file = download_audio(url, video_id)
                print(f"[INFO] Transcribing {audio_file}...")
                result = model.transcribe(audio_file)
                transcript = result['text']

                # Save transcript to transcripts folder
                transcript_path = os.path.join(TRANSCRIPT_DIR, f"{video_id}.txt")
                with open(transcript_path, "w", encoding="utf-8") as f:
                    f.write(transcript)

                os.remove(audio_file)
                print("[INFO] Transcription complete. Audio file removed.")
            except Exception as e:
                error = str(e)
    return render_template_string(HTML_TEMPLATE, transcript=transcript, error=error, video_id=video_id)

@app.route('/download/<video_id>')
def download(video_id):
    transcript_path = os.path.join(TRANSCRIPT_DIR, f"{video_id}.txt")
    if os.path.exists(transcript_path):
        return send_file(transcript_path, as_attachment=True)
    return f"Transcript file {transcript_path} not found.", 404

if __name__ == '__main__':
    app.run(debug=True)
